#' PhenoGraph clustering
#' 
#' R implementation of the PhenoGraph algorithm
#' 
#' A custom R implementation of the PhenoGraph (http://www.cell.com/cell/abstract/S0092-8674(15)00637-6) algorithm, 
#' which is a clustering method designed for high-dimensional single-cell data analysis. It works by creating a graph ("network") representing 
#' phenotypic similarities between cells by calclating the Jaccard coefficient between nearest-neighbor sets, and then identifying communities 
#' using the well known Louvain method (https://sites.google.com/site/findcommunities/) in this graph. 
#' 
#' That version used PCA or LSA reduced meta-cells and multithreading annoy version for K-nn search (from uwot package).  
#' 
#' @param data list; Input data (gficf object)
#' @param from.embedded logical; Use embeddedd (UMAP or tSNA) space for clustering cells. Best results are usually obtained not using the embedded space.
#' @param k integer; number of nearest neighbours (default:15)
#' @param dist.method character; Dist to use for K-nn. Type of distance metric to use to find nearest neighbors. One of:
#' \itemize{
#'   \item \code{"euclidean"} (the default)
#'   \item \code{"cosine"}
#'   \item \code{"manhattan"}
#'   \item \code{"hamming"} (very slow)
#' }
#' @param  nt integer; Number of cpus to use for k-nn search
#' @param community.algo characthers; Community algorithm to use for clustering. Supported are:
#' \itemize{
#'   \item \code{"louvain"} (the default, the original louvain method)
#'   \item \code{"louvain 2"} (louvain with modularity optimization from Seurat)
#'   \item \code{"louvain 3"} (Louvain algorithm with multilevel refinement from Seurat)
#'   \item \code{"leiden"} (Leiden algorithm see Traag et al. 2019)
#'   \item \code{"walktrap"}
#'   \item \code{"fastgreedy"}
#' }
#' @param store.graph logical; Store produced phenograph in the gficf object
#' @param seed integer; Seed to use for replication.
#' @param verbose logical; Increase verbosity.
#' @param resolution Value of the resolution parameter, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities (used only for leiden and louvain 2 or 3 methods).
#' @param n.start Number of random starts (used only for louvain 2 or 3 methods).
#' @param n.iter Maximal number of iterations per random start (used only for louvain 2 or 3 methods).
#' @return the updated gficf object
#' @importFrom  igraph graph.data.frame simplify cluster_louvain walktrap.community fastgreedy.community membership as_adj
#' @importFrom RcppParallel setThreadOptions RcppParallelLibs
#' @importFrom reticulate py_module_available
#' @importFrom leiden leiden
#' @import uwot
#' @import Matrix
#' @export
clustcells <- function(data,from.embedded=F,k=15,dist.method="manhattan",nt=2,community.algo="louvain 3",store.graph=T,seed=180582,verbose=TRUE, resolution = 0.25, n.start = 50, n.iter = 250)
{
  community.algo = base::match.arg(arg = community.algo,choices = c("louvain","louvain 2","louvain 3","walktrap","fastgreedy","leiden"),several.ok = F)
  
  if (is.null(data$embedded)) {stop("Run first runReduction function")}
  set.seed(seed)
  tsmessage("Finding Neighboors..",verbose = verbose)
  
  if (from.embedded)
  {
    if(is.null(data$embedded)) {stop("First run runReduction to embed your cells")}
    neigh = uwot:::find_nn(as.matrix(data$embedded[,c(1,2)]),k=k+1,include_self = T,n_threads = nt,verbose = TRUE,method = "annoy",metric=dist.method)$idx
  } else {
    if(is.null(data$pca)) {stop("First run runPCA or runLSA to reduce dimensionality")}
    neigh = uwot:::find_nn(data$pca$cells,k=k+1,include_self = T,n_threads = nt,verbose = verbose,method = "annoy",metric=dist.method)$idx
  }
  
  neigh = neigh[,-1]
  RcppParallel::setThreadOptions(numThreads = nt)
  relations <- rcpp_parallel_jaccard_coef(neigh,verbose)
  relations <- relations[relations[,3]>0, ]
  relations <- as.data.frame(relations)
  colnames(relations)<- c("from","to","weight")
  g <- igraph::graph.data.frame(relations, directed=FALSE)
  rm(relations,neigh);gc()
  
  if (community.algo=="louvain")
  {
    tsmessage("Performing louvain...",verbose = verbose)
    community <- igraph::cluster_louvain(g)
  }
  
  if (community.algo=="louvain 2")
  {
    tsmessage("Performing louvain with modularity optimization...",verbose = verbose)
    community <- RunModularityClustering(igraph::as_adjacency_matrix(g,attr = "weight",sparse = T),1,resolution,1,n.start,n.iter,seed,verbose)
  }
  
  if (community.algo=="louvain 3")
  {
    tsmessage("Performing louvain with modularity optimization...",verbose = verbose)
    community <- RunModularityClustering(igraph::as_adjacency_matrix(g,attr = "weight",sparse = T),1,resolution,2,n.start,n.iter,seed,verbose)
  }
  
  if (community.algo=="walktrap")
  {
    tsmessage("Performing walktrap...",verbose = verbose)
    community <- igraph::walktrap.community(g)
  } 
  if (community.algo=="fastgreedy") {
    tsmessage("Performing fastgreedy...",verbose = verbose)
    community <- igraph::fastgreedy.community(g)
  }
  
  if (community.algo=="leiden")
  {
    if (!(reticulate::py_module_available("leidenalg") && reticulate::py_module_available("igraph")))
      stop("Cannot find Leiden algorithm, please install through pip (e.g. sudo -H pip install leidenalg igraph).")
    
    tsmessage("Performing leiden...",verbose = verbose)
    community <- leiden::leiden(object = g,resolution_parameter=resolution)
  }
  
  
  if(community.algo %in% c("louvain 2","louvain 3","leiden")) {
    if(community.algo %in% c("louvain 2","louvain 3")) {community = community + 1}
    data$embedded$cluster = as.character(community)
  } else {
    data$embedded$cluster <- as.character(igraph::membership(community))
  }
  
  if (store.graph) {data$community=community;data$cell.graph=g} else {data$community=community}
  
  # get centroid of clusters
  tsmessage("Computing Cluster Signatures...",verbose = verbose)
  cluster.map = data$embedded$cluster
  u = base::unique(cluster.map)
  data$cluster.gene.rnk = base::sapply(u, function(x,y=data$gficf,z=cluster.map) armaRowSum(y[,z%in%x]))
  
  tsmessage(paste("Detected Clusters:",length(unique(data$embedded$cluster))),verbose = verbose)
  
  return(data)
}

# Runs the modularity optimizer (C++ function from seurat package https://github.com/satijalab/seurat)
#
# @param SNN SNN matrix to use as input for the clustering algorithms
# @param modularity Modularity function to use in clustering (1 = standard; 2 = alternative)
# @param resolution Value of the resolution parameter, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities
# @param algorithm Algorithm for modularity optimization (1 = original Louvain algorithm; 2 = Louvain algorithm with multilevel refinement; 3 = SLM algorithm; 4 = Leiden algorithm). Leiden requires the leidenalg python module.
# @param n.start Number of random starts
# @param n.iter Maximal number of iterations per random start
# @param random.seed Seed of the random number generator
# @param print.output Whether or not to print output to the console
# @param temp.file.location Deprecated and no longer used
# @param edge.file.name Path to edge file to use
#
# @return clusters
#
#' @importFrom utils read.table write.table
#
RunModularityClustering <- function(SNN = matrix(), modularity = 1, resolution = 0.8, algorithm = 1, n.start = 10, n.iter = 10, random.seed = 0, print.output = TRUE, temp.file.location = NULL, edge.file.name = "") 
{
  clusters <- RunModularityClusteringCpp(SNN,modularity,resolution,algorithm,n.start,n.iter,random.seed,print.output,edge.file.name)
  return(clusters)
}


#' Cell clustering by pathway's activity
#' 
#' Cell clustering using pathway expression instead of gene expression
#' 
#' Cells are clustered using the estimated pathway activity levels by scGSEA() function.
#' Cells are clustered either by using PhenoGraph algorithm or hierarchical clustering. In the case of PhenoGraph method is used
#' identified clusters with louvain method are stored into data$embedded$cluster.by.scGSEA while the prduced graph into data$scgsea$cell.graph. 
#' In case hierarchical clusteringis used the resulting dendogram is stored into data$scgsea$h.dendo.
#' 
#' 
#' @param data list; Input data (gficf object)
#' @param method character; Method used to produce the cell network, such as phenograph or fgraph.
#' @param pca numeric; If different from NULL data are reduced using pca component before to apply clustering algorithm.
#' @param k integer; number of nearest neighbours (default:10).
#' @param resolution Value of the resolution parameter, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities.
#' @param n.start Number of random starts.
#' @param n.iter Maximal number of iterations per random start.
#' @param nt integer; Number of cpus to use for k-nn search. If zero all cpu are used.
#' @param seed integer; Seed to use for replication.
#' @param verbose logical; Increase verbosity.
#' @return the updated gficf object
#' @importFrom  igraph graph.data.frame graph.adjacency
#' @importFrom RcppParallel setThreadOptions RcppParallelLibs
#' @import uwot
#' @importFrom irlba irlba
#' @import Matrix
#' @export
clustcellsBYscGSEA <- function(data,method="fgraph",pca=NULL,k=10, resolution = 0.25, n.start = 50, n.iter = 250, nt=0, verbose=T, seed=180582)
{
  if (is.null(data$scgsea)) {stop("Run first runScGSEA function")}
  method = base::match.arg(arg = method,choices = c("phenograph","fgraph"),several.ok = F)
  
  nt=detectCores()
  
  if (method=="phenograph") {
    x = data$scgsea$x
    
    tsmessage("Finding Neighboors..",verbose = verbose)
    
    if (!is.null(pca)) {
      x <- irlba::irlba(A = x,nv=pca,center = Matrix::rowMeans(t(x)))
      x <- x$u %*% diag(x$d)
      rownames(x) <- rownames(data$scgsea$x)
    }
    
    if (ncol(x)>100) {
      neigh = uwot:::find_nn(x,k=k,include_self = F,n_threads = nt,verbose = verbose,method = "annoy",metric="manhattan",n_trees = 100)$idx
    } else {
      neigh = uwot:::find_nn(x,k=k,include_self = F,n_threads = nt,verbose = verbose,method = "fnn",metric="manhattan")$idx
    }
    rm(x)
    
    RcppParallel::setThreadOptions(numThreads = nt)
    relations <- rcpp_parallel_jaccard_coef(neigh,verbose)
    relations <- relations[relations[,3]>0, ]
    relations <- as.data.frame(relations)
    colnames(relations)<- c("from","to","weight")
    g <- igraph::graph.data.frame(relations, directed=FALSE)
    rm(relations,neigh);gc()
    
    tsmessage("Performing louvain with modularity optimization...",verbose = verbose)
    community <- RunModularityClustering(igraph::as_adjacency_matrix(g,attr = "weight",sparse = T),1,resolution,2,n.start,n.iter,seed,verbose)
    community = community + 1
    data$embedded$cluster.by.scGSEA = as.character(community)
    data$scgsea$cell.graph=g; rm(g)
    
    # get centroid of clusters
    tsmessage("Computing Cluster Signatures...",verbose = verbose)
    cluster.map = data$embedded$cluster.by.scGSEA
    u = base::unique(cluster.map)
    data$scgsea$cluster.gsea.mu = base::sapply(u, function(x,y=t(data$scgsea$x),z=cluster.map) Matrix::rowMeans(y[,z%in%x]))
    tsmessage(paste("Detected Clusters:",length(unique(data$embedded$cluster))),verbose = verbose)
  }
  
  if (method=="fgraph") {
    g = uwot::umap(X = as.matrix(data$scgsea$x),n_neighbors = k,metric = "manhattan",scale = "Z",n_trees = 100,pca = pca,ret_extra = "fgraph",n_threads = nt,verbose = verbose)$fgraph
    g <- igraph::graph.adjacency(adjmatrix = g, mode = "undirected",weighted = TRUE,diag = F)
    
    tsmessage("Performing louvain with modularity optimization...",verbose = verbose)
    community <- RunModularityClustering(igraph::as_adjacency_matrix(g,attr = "weight",sparse = T),1,resolution,2,n.start,n.iter,seed,verbose)
    community = community + 1
    data$embedded$cluster.by.scGSEA = as.character(community)
    data$scgsea$cell.graph=g; rm(g)
    
    # get centroid of clusters
    tsmessage("Computing Cluster Signatures...",verbose = verbose)
    cluster.map = data$embedded$cluster.by.scGSEA
    u = base::unique(cluster.map)
    data$scgsea$cluster.gsea.mu = base::sapply(u, function(x,y=t(data$scgsea$x),z=cluster.map) Matrix::rowMeans(y[,z%in%x]))
    tsmessage(paste("Detected Clusters:",length(unique(data$embedded$cluster))),verbose = verbose)
  }
  
  gc()
  return(data)
}
